function [dependent_variable,regressor_mat,time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs]=create_regression_matrices_no_NaN(dependent_variable,regressor_mat,data_array_for_regression_stacked_by_variable,pos,country_indicator_names_mapping,time_fixed_effects_dummy,country_fixed_effects_dummy,drop_warning_dummy)
% function [dependent_variable,data_array_for_regression]=create_regression_matrices_no_NaN(dependent_variable,data_array_for_regression)
% Create regressor matrix by dropping all time-points that have no non-NaN observations
% We need at least one country with full set of regressors
% 
% Inputs:
%   - dependent_variable                                [T by n_countries] dependent variable matrix
%   - regressor_mat                                     [T by n_countries by nvars] regressor matrix before removing NaNs and adding fixed effects
%   - data_array_for_regression_stacked_by_variable     [T by n_countries by nvars] data matrix including fixed effects
%   - pos                                               [structure] containing variable position
%   - time_fixed_effects_dummy                          [boolean]   indicator whether time fixed effects are requested
%   - country_fixed_effects_dummy                       [boolean]   indicator whether country fixed effects are requested
% 
% Outputs:
%   - dependent_variable                                [T_non_NaN by n_countries] dependent variable matrix after removing NaNs
%   - regressor_mat                                     [T_non_NaN by n_countries by nvars] regressor matrix after removing NaNs and adding fixed effects
%   - time_indices_without_only_NaN_obs                 [T_non_NaN by 1] indices of non-NaN variables

% Copyright (C) 2019-2023 Benjamin Born, Francesco D'Ascanio, Gernot J. Mueller, Johannes Pfeifer
%
% This is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% It is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
% 
% For a copy of the GNU General Public License,
% see <http://www.gnu.org/licenses/>.

if nargin<8
    drop_warning_dummy = 0;
end

time_dim=1;
country_dim=2;
var_dim=3;

%% Stack matrices
n_dependent_variables=size(dependent_variable,3);
stacked_matrix=cat(3,regressor_mat,dependent_variable);
%get NaN-entries
NaN_entries=isnan(stacked_matrix);
%find country-time observations with only incomplete set of variables
country_time_NaN_obs=any(NaN_entries,var_dim);
%find timepoints without country with  full set of variables
timepoints_without_full_set=all(country_time_NaN_obs,country_dim);
time_indices_without_only_NaN_obs=find(~all(country_time_NaN_obs,country_dim));

%select unproblematic time-points
stacked_matrix_without_NaN_timepoints=stacked_matrix(~timepoints_without_full_set,:,:);

%% get countries to be dropped; NB: cannot affect time points, because only countries without full set of regressors are dropped
NaN_entries=isnan(stacked_matrix_without_NaN_timepoints);
country_time_NaN_obs=any(NaN_entries,var_dim);
country_without_full_set=all(country_time_NaN_obs,time_dim);
country_indices_without_only_NaN_obs=find(~country_without_full_set');

if any(country_without_full_set) && drop_warning_dummy
    fprintf('\n')
    country_index=find(country_without_full_set);    
    fprintf('I am dropping %s because there are only NaN observations in the sample\n',country_indicator_names_mapping{data_array_for_regression_stacked_by_variable(1,find(country_without_full_set),pos.country_ident)})
    fprintf('\n')
end

stacked_matrix_final=stacked_matrix_without_NaN_timepoints(:,country_indices_without_only_NaN_obs,:);


regressor_mat=stacked_matrix_final(:,:,1:end-n_dependent_variables);
dependent_variable=stacked_matrix_final(:,:,end-n_dependent_variables+1:end);

% % find NaN for a particular timepoint for all countries in a particular 
% NaN_variable_for_all_countries_index=all(isnan(regressor_mat),2);
% non_NaN_time_for_any_variable_in_all_timepoints_index=find(~any(NaN_variable_for_all_countries_index,3));
% 
% first_non_NaN_obs_regressors=find(~any(NaN_variable_for_all_countries_index,3),1,'first');
% last_non_NaN_obs_regressors=find(~any(NaN_variable_for_all_countries_index,3),1,'last');
% if length(non_NaN_time_for_any_variable_in_all_timepoints_index)~=(last_non_NaN_obs_regressors-first_non_NaN_obs_regressors+1)
%     error('Missing values in between')
% end
% 
% 
% %% Independent variable matrix
% % find NaN for a particular timepoint for all countries in a particular 
% first_non_NaN_obs_dependent=find(~all(isnan(dependent_variable),2),1,'first');
% last_non_NaN_obs_dependent=find(~all(isnan(dependent_variable),2),1,'last');
% if length(find(~all(isnan(dependent_variable),2)))~=(last_non_NaN_obs_dependent-first_non_NaN_obs_dependent+1)
%     error('Missing values in between')
% end
% 
% %% Smallest common set
% first_obs=max(first_non_NaN_obs_regressors,first_non_NaN_obs_dependent);
% last_obs=min(last_non_NaN_obs_regressors,last_non_NaN_obs_dependent);

% regressor_mat=regressor_mat(first_obs:last_obs,:,:);
% dependent_variable=dependent_variable(first_obs:last_obs,:);

n_countries=size(dependent_variable,2);

%% add fixed effects
if country_fixed_effects_dummy
    country_FE_indices=[];
    for ii=1:length(country_indices_without_only_NaN_obs)
        country_FE_indices=[country_FE_indices, pos.(['Country_FE_' num2str(country_indices_without_only_NaN_obs(ii))])];
    end
    data_array_for_regression_country_Fixed_Effects=data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,country_FE_indices);
else
    data_array_for_regression_country_Fixed_Effects=[];    
end
if time_fixed_effects_dummy
    time_FE_indices=[];
    if country_fixed_effects_dummy %drop one time FE to avoid dummy variable trap
        for ii=1:length(time_indices_without_only_NaN_obs)-1 
            time_FE_indices=[time_FE_indices, pos.(['Time_FE_' num2str(time_indices_without_only_NaN_obs(ii))])];
        end
    else
        for ii=1:length(time_indices_without_only_NaN_obs)
            time_FE_indices=[time_FE_indices, pos.(['Time_FE_' num2str(time_indices_without_only_NaN_obs(ii))])];
        end
    end
    data_array_for_regression_time_Fixed_Effects=data_array_for_regression_stacked_by_variable(time_indices_without_only_NaN_obs,country_indices_without_only_NaN_obs,time_FE_indices);
else
    data_array_for_regression_time_Fixed_Effects=[];    
end
if ~country_fixed_effects_dummy && ~time_fixed_effects_dummy %add constant to regression
    data_array_for_regression_time_Fixed_Effects=ones(size(regressor_mat,1),n_countries);
end

regressor_mat=cat(3,regressor_mat,data_array_for_regression_country_Fixed_Effects,data_array_for_regression_time_Fixed_Effects);
